/* 	* Created: 16/11/2012
	* Last Modified: 16/12/2016
	* Purpose: It supersedes MolaNAPP16v1, corrects some No occupation, headchild stat. To merge and compare mola-points-original and mola-points-modified with census 1881. AND it uses rclink to solve 
			those observations that do not merge. AND Generates sample we use for the analysis.
	* DATASETS Used:
		Originals:
		- "$MOLA/ll_pl_pa_points.dta"			: MOLA original points
		- "$MOLA/ll_point1881_soc_civ_near"		: MOLA modified points, comes from shp2dta.do
		- "$MOLA/ll_point1881_soc_civ_nearplacebo"	: MOLA modified points with placebo social borders (change of 5degrees, x 30mts, y 25mts), comes from Placebo_creation.do
		Modified:
		- "$NAPP/NAPP_London3V1"	: NAPP only London (created by creationV1.do)
		
	* DOFILES used:
		- "$CODE/addressnames1to89V2.do" 
		- "$CODE/addressnames90to171.do"
		
	* DATASETS Created:
		- "$MOLA/molaoriginal_unique_JAG": MOLA original points with modified names and unique observations
		- "$MOLA/molamodified_unique_JAG": MOLA modified points first round with modified names and unique observations
		- "$NAPP/ParishesPARIDGB" : Dataset with code and name from civil parish of enumeration
		- "$NAPP/ParishesBPSTPAGB": Dataset with name from civil parish of birth 
		- "$NAPP/DictionaryParSubDisCou.dta": Dictionary of parish of birth linked to parish of enumeration code, subdistrict of enumeration, district of enumeration or county of enumeration
		- "$NAPP/NAPP_London4V1_JAG": uses NAPP_London3V1 and call DOFILES "$CODE/addressnames1to86Rclink.do" "$CODE/addressnames87to173Rclink.do"
		- "$NAPP/MolaOriginal_NAPP_JAG": Merge Mola original unique with census. 
		- $NAPP/MolaModified_NAPP_JAG: Merge, by reclinck, Mola modified with unique census. It contains some duplicates (due to reclinck command)
		- /MolaModified_NAPP_JAG2": Corrects MolaModified_NAPP_JAG so there is no duplicates after reclinck. It keeps both merge and nonmerge observations
		- /MolaModified_NAPP_JAG3new: uses "MolaModified_NAPP_JAG2" Keeps only merged and master observations, drops AGE>=999
		- /MolaModified_NAPP_JAG4new" Based on /MolaModified_NAPP_JAG3 but with occupation variables generated and dropping only master 
		- /MolaModified_NAPP_JAG4placebonew" Based on /MolaModified_NAPP_JAG4 but including placebo social and civil parishes
		- "$NAPP/MolaModified_NAPP_JAG3Anew" same vars as MolaModified_NAPP_JAG4 but keeping master and merged. It can be used to test unbalanced sample
		- /MolaModified_NAPP_ALL_JAG1new.xls"	: uses /MolaModified_NAPP_JAG4 and creates Excel file to be imported in ArCMap
		- "$NAPP/MolaModified_NAPP_Heads_JAG1_mapnew.dta" : uses "$NAPP/MolaModified_NAPP_JAG1.dta" and keeps only head of hh and merge it to maps this is the dataset we use for R analysis
		- "MolaModified_NAPP_Heads_JAG1new"	: uses $NAPP/MolaModified_NAPP_Heads_JAG1_map.dta and creates Excel file to be imported in ArCMap
		- "$NAPP/DictionaryParSubDisCouNEW.dta" 		Dictionary relating BPCCTYGB BPSTPAGB to PARIDGB SUBDIDGB RGDISTGB RGCNTYGB
		*/	
*gl ROOT "/Users/josealbertoguerra/Dropbox"
gl ROOT "C:/Users/`c(username)'/Dropbox/NetworkParish2016/Occupational_Choice/Submission/Final_ReStat/Codes"
gl CODE "$ROOT/A_CREATING_SAMPLE"
gl MOLA "$ROOT/MOLA"
gl NAPPRaw "$ROOT/NeworkParish2016/Occupational_Choice/RawData/census1881/NAPP_address"
gl NAPP "$ROOT/NAPP"

cap log close
set more off

/*========================================*/
// 1. Rearrange MOLA's original addresses //
/*========================================*/

use "$MOLA/ll_pl_pa_points.dta", clear

replace name="All Hallows London Wall" if name=="All Hallows London"
replace name="St Mary Abbots Kensington" if name=="Brompton"
replace name="St Mary Lambeth" if name=="Lambeth Palace"
replace name="Old Tower Without And Tower Of London" if name=="Liberty of the Old Tower"
replace name="Old Tower Without And Tower Of London" if name=="Liberty of the Tower"
replace name="St Luke Chelsea" if name=="St Anne Kensington"
replace name="St George The Martyr" if name=="St George Southwark"
replace name="St James Clerkenwell" if name=="St John Clerkenwell"
replace name="St Margaret Westminster" if name=="St John the Evangelist Westminster"
replace name="St Botolph Aldgate" if name=="St Mary Matfellon"
replace name="St Dunstan Stepney/Mile End" if name=="Mile End New Town"

gen border=(name=="Battersea" | name=="Bow" | name=="Bromley St Leonard" | name=="Brompton" | name=="Camberwell" |  name=="St Dunstan Stepney/Mile End" | name=="Mile End New Town"  ///
| name=="Poplar"  | name=="St George Hanover Square" | name=="St James Clerkenwell" | name=="St Leonard Shoreditch" | name=="St Luke Chelsea" | name=="St Luke Old Street" ///
| name=="St Margaret Westminster" | name=="St Mary Abbots Kensington" | name=="St Mary Lambeth" | name=="St Mary Paddington" | name=="St Mary Rotherhithe" | name=="St Marylebone" ///
| name=="St Matthew Bethnal Green" | name=="St Nicholas Deptford" | name=="St Pancras" | name=="St Paul Deptford" | name=="Greenwich" ///
| name=="St Anne Kensington" | name=="Brompton" | name=="Islington")

gen extremeborder=border
replace extremeborder=0 if name=="St George Hanover Square" | name=="St Luke Old Street" |  name=="St Dunstan Stepney/Mile End" | name=="Mile End New Town" | name=="St Marylebone"

gen name2 = upper(name)
rename name parish

// Getting rid of &, ( ,), [, ], ;, .
gen new_p_name0 = subinstr(p_name,"&","",.)
replace new_p_name0 = subinstr(new_p_name0,"(","",.)
replace new_p_name0 = subinstr(new_p_name0,")","",.)
replace new_p_name0 = subinstr(new_p_name0,"[","",.)
replace new_p_name0 = subinstr(new_p_name0,"]","",.)
replace new_p_name0 = subinstr(new_p_name0,";","",.)
replace new_p_name0 = subinstr(new_p_name0,".","",.)
replace new_p_name0 = subinstr(new_p_name0,`"""',"",.)
replace new_p_name0 = subinstr(new_p_name0,"=","",.)
replace new_p_name0 = subinstr(new_p_name0,"-","",.)

// no house numbers
gen new_p_name = regexs(0) if regexm(new_p_name0,"^[ a-zA-Z]*")
rename new_p_name temp 
gen new_p_name=trim(temp)

// small caps
replace new_p_name = lower(new_p_name)

// transfort "street"/"place" into "st", "pl"
replace new_p_name = subinstr(new_p_name,"street","st",.)
replace new_p_name = subinstr(new_p_name,"place","pl",.)
replace new_p_name = subinstr(new_p_name,"church","ch",.)
replace new_p_name = subinstr(new_p_name,"yard","yd",.)

// drop duplicates
duplicates drop name2 new_p_name, force
drop if new_p_name==""
save "$MOLA/molaoriginal_unique_JAG", replace

/*========================================*/
// 2. Rearrange MOLA's modified addresses //
/*========================================*/

use "$MOLA/ll_point1881_soc_civ_near", clear
cap rename P_NAME p_name
cap rename NAME name
rename OBJECTID_1 objectid_1


replace name="All Hallows London Wall" if name=="All Hallows London"
replace name="St Mary Abbots Kensington" if name=="Brompton"
replace name="St Mary Lambeth" if name=="Lambeth Palace"
replace name="Old Tower Without And Tower Of London" if name=="Liberty of the Old Tower"
replace name="Old Tower Without And Tower Of London" if name=="Liberty of the Tower"
replace name="St Luke Chelsea" if name=="St Anne Kensington"
*replace name="St Saviour Southwark" if name=="St George Southwark"
replace name="St George The Martyr" if name=="St George Southwark"
replace name="St James Clerkenwell" if name=="St John Clerkenwell"
replace name="St Margaret Westminster" if name=="St John the Evangelist Westminster"
replace name="St Botolph Aldgate" if name=="St Mary Matfellon"
replace name="St Dunstan Stepney/Mile End" if name=="Mile End New Town"

gen border=(name=="Battersea" | name=="Bow" | name=="Bromley St Leonard" | name=="Brompton" | name=="Camberwell" |  name=="St Dunstan Stepney/Mile End" | name=="Mile End New Town"  ///
| name=="Poplar"  | name=="St George Hanover Square" | name=="St James Clerkenwell" | name=="St Leonard Shoreditch" | name=="St Luke Chelsea" | name=="St Luke Old Street" ///
| name=="St Margaret Westminster" | name=="St Mary Abbots Kensington" | name=="St Mary Lambeth" | name=="St Mary Paddington" | name=="St Mary Rotherhithe" | name=="St Marylebone" ///
| name=="St Matthew Bethnal Green" | name=="St Nicholas Deptford" | name=="St Pancras" | name=="St Paul Deptford" | name=="Greenwich" ///
| name=="St Anne Kensington" | name=="Brompton" | name=="Islington")

gen extremeborder=border
replace extremeborder=0 if name=="St George Hanover Square" | name=="St Luke Old Street" |  name=="St Dunstan Stepney/Mile End" | name=="Mile End New Town" | name=="St Marylebone"


rename name parish
gen name2 = upper(parish)

// Getting rid of &, ( ,), [, ], ;, .
gen new_p_name0 = subinstr(p_name,"&","",.)
replace new_p_name0 = subinstr(new_p_name0,"(","",.)
replace new_p_name0 = subinstr(new_p_name0,")","",.)
replace new_p_name0 = subinstr(new_p_name0,"[","",.)
replace new_p_name0 = subinstr(new_p_name0,"]","",.)
replace new_p_name0 = subinstr(new_p_name0,";","",.)
replace new_p_name0 = subinstr(new_p_name0,".","",.)
replace new_p_name0 = subinstr(new_p_name0,`"""',"",.)
replace new_p_name0 = subinstr(new_p_name0,"=","",.)
replace new_p_name0 = subinstr(new_p_name0,"-","",.)

// no house numbers
gen new_p_name = regexs(0) if regexm(new_p_name0,"^[ a-zA-Z]*")
rename new_p_name temp 
gen new_p_name=trim(temp)

// small caps
replace new_p_name = lower(new_p_name)

// transfort "street"/"place" into "st", "pl"
replace new_p_name = subinstr(new_p_name,"street","st",.)
replace new_p_name = subinstr(new_p_name,"place","pl",.)
replace new_p_name = subinstr(new_p_name,"church","ch",.)
replace new_p_name = subinstr(new_p_name,"yard","yd",.)

// drop duplicates
duplicates drop name2 new_p_name, force
replace new_p_name="Brewers Quay" if objectid_1==148
drop if new_p_name==""

save "$MOLA/molamodified_unique_JAG", replace

/*===========================================*/
// 3. Create location dictionaries from NAPP //
/*===========================================*/

*To create parish enumeration dictionary
use gb81a_paridgb gb81a_rgcntygb using "$NAPPRaw/napp_00002.dta", clear 
rename gb81a_paridgb PARIDGB 
rename gb81a_rgcntygb RGCNTYGB
duplicates drop PARIDGB RGCNTYGB, force
decode PARIDGB , gen(STPARIDGB)
replace STPARIDGB=upper(STPARIDGB)
decode RGCNTYGB , gen(STRGCNTYGB)
replace STRGCNTYGB=upper(STRGCNTYGB)
gen idusing=_n
save "$NAPP/ParishesPARIDGB", replace

*To create subdistrict enumeration dictionary
use gb81a_subdidgb gb81a_rgcntygb using "$NAPPRaw/napp_00002.dta", clear
rename gb81a_subdidgb SUBDIDGB
rename gb81a_rgcntygb RGCNTYGB
duplicates drop SUBDIDGB RGCNTYGB, force
decode SUBDIDGB , gen(STSUBDIDGB)
replace STSUBDIDGB=upper(STSUBDIDGB)
decode RGCNTYGB , gen(STRGCNTYGB)
replace STRGCNTYGB=upper(STRGCNTYGB)
gen idusinga=_n
save "$NAPP/ParishesSUBDIDGB", replace

*To create district enumeration dictionary
use gb81a_rgdistgb gb81a_rgcntygb using "$NAPPRaw/napp_00002.dta", clear
rename gb81a_rgdistgb RGDISTGB
rename gb81a_rgcntygb RGCNTYGB
duplicates drop RGDISTGB RGCNTYGB, force
decode RGDISTGB , gen(STRGDISTGB)
replace STRGDISTGB=upper(STRGDISTGB)
decode RGCNTYGB , gen(STRGCNTYGB)
replace STRGCNTYGB=upper(STRGCNTYGB)
gen idusingb=_n
save "$NAPP/ParishesRGDISTGB", replace

*To create county enumeration dictionary
use gb81a_rgcntygb using "$NAPPRaw/napp_00002.dta"
rename gb81a_rgcntygb RGCNTYGB
duplicates drop RGCNTYGB, force
decode RGCNTYGB, gen(STRGCNTYGB)
replace STRGCNTYGB=upper(STRGCNTYGB)
gen idusingc=_n
save "$NAPP/ParishesRGCNTYGB", replace

*To create parish birth dictionary
use BPSTPAGB BPCCTYGB  using "$NAPP/NAPP_London3V1", clear
drop if BPCCTYGB ==997 |BPCCTYGB ==998 |BPCCTYGB ==999 
duplicates drop BPSTPAGB BPCCTYGB, force
decode BPCCTYGB, gen(STBPCCTYGB)
replace STBPCCTYGB=upper(STBPCCTYGB)
sort BPSTPAGB
gen idmaster=_n
save "$NAPP/ParishesBPSTPAGB", replace
duplicates drop BPCCTYGB, force
keep idmaster BPCCTYGB STBPCCTYGB
save "$NAPP/ParishesBPCCTYGB", replace

*County dictionary based on residency and place of birth
use "$NAPP/ParishesBPCCTYGB", clear
reclink STBPCCTYGB using "$NAPP/ParishesRGCNTYGB",idmaster(idmaster) idusing(idusingc) uvarlist(STRGCNTYGB) gen(clinkvar) _merge(mergeclink) minscore(.5) //below .9 there is not many good matches
keep if mergeclink==3
keep BPCCTYGB STBPCCTYGB RGCNTYGB idmaster
rename RGCNTYGB RGCNTYGBequi
bys idmaster: gen i=_n
reshape wide RGCNTYGBequi, i(idmaster) j(i)
save "$NAPP/ParishDictionaryCounty.dta", replace

*Parish dictionary based on residency and place of birth
use "$NAPP/ParishesBPSTPAGB", clear
reclink BPSTPAGB STBPCCTYGB using "$NAPP/ParishesPARIDGB",idmaster(idmaster) idusing(idusing) uvarlist(STPARIDGB STRGCNTYGB) gen(clinkvar) _merge(mergeclink) minscore(.9) //below .9 there is not many good matches
*In the following dofile we manually identify the good matches
do "$CODE/ParishDictionaryReclinkCheck1V01.do"
keep if mergeclink==3
rename PARIDGB PARIDGBequi 
keep  PARIDGBequi idmaster
bys idmaster: gen i=_n
reshape wide PARIDGBequi, i(idmaster) j(i)
save "$NAPP/ParishDictionaryBirthActual.dta", replace

*To deal with those parish/county birth that did not merge,
use "$NAPP/ParishesBPSTPAGB", clear
preserve
	*attempting by subdistrict
	reclink BPSTPAGB STBPCCTYGB using "$NAPP/ParishesSUBDIDGB",idmaster(idmaster) idusing(idusinga) uvarlist(STSUBDIDGB STRGCNTYGB) gen(clinkvar) _merge(mergeclink) minscore(.9) //below .9 there is not many good matches	do "$CODE/ParishDictionaryReclinkCheck2V01.do"
	do "$CODE/ParishDictionaryReclinkCheck1SubV01.do"
	keep if mergeclink==3
	cou
	rename SUBDIDGB SUBDIDGBequi
	keep  SUBDIDGBequi idmaster
	bys idmaster: gen i=_n
	reshape wide SUBDIDGBequi, i(idmaster) j(i)
	save "$NAPP/ParishDictionaryBirthSubdist.dta", replace
restore

preserve
	*attempting by district
	reclink BPSTPAGB STBPCCTYGB using "$NAPP/ParishesRGDISTGB",idmaster(idmaster) idusing(idusingb) uvarlist(STRGDISTGB STRGCNTYGB) gen(clinkvar) _merge(mergeclink) minscore(.9) //below .9 there is not many good matches
	do "$CODE/ParishDictionaryReclinkCheck1DistV01.do"
	keep if mergeclink==3
	cou
	rename RGDISTGB RGDISTGBequi
	keep RGDISTGBequi idmaster
	bys idmaster: gen i=_n
	reshape wide RGDISTGBequi, i(idmaster) j(i)
	save "$NAPP/ParishDictionaryBirthDist.dta", replace
restore

merge n:1 STBPCCTYGB using "$NAPP/ParishDictionaryCounty.dta"
cou
keep if _merge==3
keep  RGCNTYGBequi* idmaster
save "$NAPP/ParishDictionaryBirthCounty.dta", replace

*Creating the dictionary
use "$NAPP/ParishesBPSTPAGB", clear
merge 1:1 idmaster using "$NAPP/ParishDictionaryBirthActual.dta",gen(linkparish)
merge 1:1 idmaster using "$NAPP/ParishDictionaryBirthSubdist.dta" , gen(linksubdist)
merge 1:1 idmaster using "$NAPP/ParishDictionaryBirthDist.dta" , gen(linkdist)
merge 1:1 idmaster using "$NAPP/ParishDictionaryBirthCounty.dta", gen(linkcoun)

order link*, last
order idmaster, first
label var idmaster "unique identifier BPCCTYGB BPSTPAGB"
foreach var of varlist link* {
label val `var' link
}
label def link 1 "No link" 3 "Link found"
save "$NAPP/DictionaryParSubDisCouNEW.dta", replace

/*===========================================*/
// 4. Clean the place of birth in NAPP 		 //
/*===========================================*/

use "$NAPP/NAPP_London3V1", clear

merge n:1 BPCCTYGB BPSTPAGB using "$NAPP/DictionaryParSubDisCouNEW.dta"

*To create the migrant variable based on our dictionary
*tab NATIVITY 
gen nativity = 1 
replace nativity = 2 if BPCCTYGB==095
replace nativity = 3 if BPCCTYGB==998 
replace nativity = 8 if BPCCTYGB==997
replace nativity = 9 if BPCCTYGB==999
label variable nativity "Nativity"
label def nat 1 "UK" 2 "Ireland" 3 "Foreign" 8 "At sea" 9 "Unknown"
label val nativity nat

gen migrantd = . 
replace migrantd = 1 if nativity==1 & (PARIDGB==PARIDGBequi1 | PARIDGB==PARIDGBequi2 | PARIDGB==PARIDGBequi3 | PARIDGB==PARIDGBequi4 | PARIDGB==PARIDGBequi5 | PARIDGB==PARIDGBequi6 | SUBDIDGB==SUBDIDGBequi1 | SUBDIDGB==SUBDIDGBequi2 | SUBDIDGB==SUBDIDGBequi3 | RGDISTGB==RGDISTGBequi1 | RGDISTGB==RGDISTGBequi2) 
replace migrantd = 2 if nativity==1 &  migrantd==. & (RGCNTYGB==RGCNTYGBequi1 | RGCNTYGB==RGCNTYGBequi2)
replace migrantd = 3 if nativity==1 & migrantd==. & (RGCNTYGBequi1~=. | BPCCTYGB==92)		//other not known parish in england
replace migrantd = 4 if nativity==1 & migrantd==.						//by def this were born in scotland or in wales
replace migrantd = 5 if nativity==2 | nativity==3 | nativity==8
replace migrantd = 6 if nativity==9
label variable migrant "Migrant"

label def mi 1 "Eng - in parish/sub/district of birth"  2 "Eng - in diff but same county of birth" ///
3 "Eng - in diff parish/sub/district/county of birth " 4 "Sco/Wal"  ///
5 "Foreign-born" 6 "Unclassifiable due to unknown birthplace"
label val migrant mi 

// no "&", "(", ")", "[", "]", ";"
gen new_address0 = subinstr(ADDRESS,"&","",.)
replace new_address0 = subinstr(new_address0,"(","",.)
replace new_address0 = subinstr(new_address0,")","",.)
replace new_address0 = subinstr(new_address0,"[","",.)
replace new_address0 = subinstr(new_address0,"]","",.)
replace new_address0 = subinstr(new_address0,";","",.)
replace new_address0 = subinstr(new_address0,".","",.)
replace new_address0 = subinstr(new_address0,`"""',"",.)
replace new_address0 = subinstr(new_address0,"/","",.)
replace new_address0 = subinstr(new_address0,"'","",.)
replace new_address0 = subinstr(new_address0,"?","",.)

// no house number            
gen new_address1 = regexs(0) if regexm(new_address0,"[ a-zA-Z]*$")
gen new_address2 = regexs(0) if regexm(new_address0,"^[ a-zA-Z]*")
gen flag=(new_address1==new_address2)
egen new_address = concat(new_address1 new_address2) if flag==0, punct(" ")
replace new_address0=new_address if flag==0 & (new_address1~="" | new_address2~="")
drop new_address new_address1 new_address2 flag

replace new_address0 = subinstr(new_address0,"+","",.)
replace new_address0 = subinstr(new_address0,"-","",.)
replace new_address0 = subinstr(new_address0,"=","",.)

gen new_address=new_address0
rename new_address temp 
gen new_address=trim(temp)

// lower case
replace new_address = lower(new_address)

// transform "street", "place" into "st", "pl"
*I do not include square and road changes so the next two dofiles have an effect
replace new_address = subinstr(new_address,"street","st",.)
replace new_address = subinstr(new_address,"place","pl",.)
replace new_address = subinstr(new_address,"church","ch",.)
replace new_address = subinstr(new_address,"yard","yd",.)

*The following two dofiles were created after running the same dofile without reclink, see S:\NetworkParish\Data\census1881\NAPP1881_address files and
*most importantly S:\NetworkParish\Data\census1881\NAPP1881_address\Merging_missingaddressesRclink
do "$CODE/addressnames1to86Rclink.do"
do "$CODE/addressnames87to173Rclink.do"
replace name="ST DUNSTAN STEPNEY/MILE END" if name=="MILE END NEW TOWN"
replace name="ST MARY LE BOW" if name=="ST PANCRAS SOPER LANE" 	//This is done because they are the same parish and in the maps it is only called ST MARY LE BOW

replace PARIDGB=117 if PARIDGB==118
drop if new_address==""
keep if PARIDGB<253 & PARIDGB~=5 & PARIDGB~=7 & PARIDGB~=26 & PARIDGB~=33 & PARIDGB~=35 & PARIDGB~=36 & PARIDGB~=241 & PARIDGB~=243 & PARIDGB~=244 & PARIDGB~=245 & PARIDGB~=246

save "$NAPP/NAPP_London4V1_JAG", replace


/*===========================================*/
// 5. Merge MOLA with NAPP census   		 //
/*===========================================*/

/*
use "$NAPP/NAPP_London4V1_JAG",clear

levelsof name, local(parishes)
local i=1
matrix match=.
matrix missi=.

foreach p of local parishes{

di "`i',`p'"
use if name2=="`p'" using "$MOLA/molaoriginal_unique_JAG", clear
gen namepar=name2
gen nameadd=new_p_name
save "$MOLA/molaoriginal_unique`i'_JAG", replace

use if name=="`p'" using "$NAPP/NAPP_London4V1_JAG", clear
mmerge name new_address using "$MOLA/molaoriginal_unique`i'_JAG", type(n:1) umatch(name2 new_p_name)
label data "Parish `p'"

cou 
local t`i'=r(N)
cap cou if _merge~=-1
local nm`i'=r(N)
cap drop if _merge==-1
cap cou if _merge==3
local matchr`i'=r(N)/`nm`i''
matrix match=match \ `matchr`i''
local missir`i'= (`t`i''-`nm`i'')/`t`i''
matrix missi=missi \ `missir`i''

cap gen x_1 = _merge==-1
cap gen x1 = _merge==1
cap gen x2 = _merge==2
cap gen x3 = _merge==3
gen y = 1

egen miss_master2 = sum(x_1)
egen only_master2 = sum(x1)
egen only_using2 = sum(x2)
egen match2 = sum(x3)
egen total2 = sum(y)

save "$NAPP/MolaOriginal_NAPP`i'_JAG",replace

duplicates drop name new_address _merge, force
sort name new_address

save "$NAPP/MolaOriginal_NAPP_unique`i'_JAG",replace
local i=`i'+1
}
local j=`i'-1
use "$NAPP/MolaOriginal_NAPP1_JAG", clear

forval x=2/`j' {
append using "$NAPP/MolaOriginal_NAPP`x'_JAG"
}

label data "All Parishes"
save "$NAPP/MolaOriginal_NAPP_JAG",replace

matrix matchmissi=match,missi
svmat matchmissi, names(var)
label var var1 matched
label var var2 missing
twoway (histogram var1) (scatter var2 var1)

cap log close
*/


use if PARIDGB<253 & PARIDGB~=5 & PARIDGB~=7 & PARIDGB~=26 & PARIDGB~=33 & PARIDGB~=35 & PARIDGB~=36 & PARIDGB~=241 & PARIDGB~=243 & PARIDGB~=244 & PARIDGB~=245 & PARIDGB~=246 ///
using "$NAPP/NAPP_London4V1_JAG",clear

drop if new_address==""

levelsof name, local(parishes)

local i=1
matrix match=.
matrix missi=.

foreach p of local parishes {

	di "`i',`p'"

	use if name2=="`p'" using "$MOLA/molamodified_unique_JAG", clear
	gen namepar=name2
	gen nameadd=new_p_name
	save "$MOLA/molamodified_unique`i'_JAG", replace
	*The following lines are defined to be able to use rclink below
	gen id_using=_n
	sort new_p_name
	tempfile temp`i'
	save `temp`i''

	// 1. mmerge 
	use if name=="`p'" using "$NAPP/NAPP_London4V1_JAG", clear
	mmerge name new_address using "$MOLA/molamodified_unique`i'_JAG", type(n:1) umatch(name2 new_p_name)
	drop if _merge==2
	tab _merge

	***NEW, to determine which can be merged between MOLA and census see br new_address new_p_name clinkvar if mergeclink==3
	di "`i',`p' Before reclink, Total obs:"
	cou

	// 2. reclink for observation _merge==1 (in NAPP not in MOLA)
	preserve
	keep if _merge==1
	*drop OBJECTID_1 OBJECTID_2 OBJECTID ROCQUE_ID p_name parish ORIG_FID Map1871 _ID new_p_name0 temp
	**XXX those are variables that are in the modified version of the maps after editing them in this dofile, if we update the map we need to check they correspond to this 
	local drops objectid_1 ROCQUE_ID p_name parish Map1871 Buffer100 Buffer80 Buffer60 Buffer50 Buffer40 Buffer30 longitude latitude social civil dist_civilborder ID_socialborder dist_socialborder IDSocial_L IDSocial_R id border extremeborder name2 new_p_name0 temp new_p_name

	foreach vvv of local drops{
		capture confirm variable `vvv'
		if !_rc {
			drop `vvv'
		}
	}

	gen id_master=_n
	sort name new_address

	save "$NAPP/MolaModified_NAPP`i'temp_JAG", replace

	reclink new_address using `temp`i'', idmaster(id_master) idusing(id_using) gen(clinkvar) uvarlist(new_p_name) _merge(mergeclink) minscore(0.95)
	tab mergeclink
	save "$NAPP/MolaModified_NAPP`i'reclink_JAG", replace
	cou
	
	if r(N)>0 {
		*Solve duplicates problem
		// Keep best match
		duplicates tag serial pernum, gen(z)
		bys serial pernum: egen k = max(clinkvar)
		drop if z!=0 & clinkvar<k
		// Randomly choose one to keep in sample of duplicates
		bys serial: gen random = runiform() if z!=0
		bys serial (random): gen f = _n==1
		bys serial new_p_name : egen f2 = sum(f)
		drop if z!=0 & f2==0
		//Delete all of these cases
		//drop if z!=0
		save "$NAPP/MolaModified_NAPP`i'reclink_JAG", replace
	}
	else {
		di "0"
	}

	// 3. Append mmerge + reclink 
	restore
	drop if _merge==1
	append using "$NAPP/MolaModified_NAPP`i'reclink_JAG"

	di "`i',`p' After reclink, Total obs:"
	cou
	replace _merge=1 if mergeclink~=.
	replace _merge=3 if mergeclink==3
	*local i=`i'+1
	***
	label data "Parish `p'"
	cou 
	local t`i'=r(N)
	cap cou if _merge~=-1
	local nm`i'=r(N)
	cap drop if _merge==-1
	cap cou if _merge==3
	local matchr`i'=r(N)/`nm`i''
	matrix match=match \ `matchr`i''
	local missir`i'= (`t`i''-`nm`i'')/`t`i''
	matrix missi=missi \ `missir`i''

	cap gen x_1 = _merge==-1
	cap gen x1 = _merge==1
	cap gen x2 = _merge==2
	cap gen x3 = _merge==3
	gen y = 1

	egen miss_master2 = sum(x_1)
	egen only_master2 = sum(x1)
	egen only_using2 = sum(x2)
	egen match2 = sum(x3)
	egen total2 = sum(y)

	save "$NAPP/MolaModified_NAPP`i'_JAG",replace

	duplicates drop name new_address _merge, force
	sort name new_address
	save "$NAPP/MolaModified_NAPP_unique`i'_JAG",replace

	local i=`i'+1
}

* Append all
local j=`i'-1
use "$NAPP/MolaModified_NAPP1_JAG", clear

forval x=2/`j' {
	append using "$NAPP/MolaModified_NAPP`x'_JAG"
}

label data "All Parishes"
save "$NAPP/MolaModified_NAPP_JAG2", replace

* Append all unique
use "$NAPP/MolaModified_NAPP_unique1_JAG", clear

forval x=2/`j' {
append using "$NAPP/MolaModified_NAPP_unique`x'_JAG"
}

label data "All Parishes"
save "$NAPP/MolaModified_NAPP_unique_JAG2", replace

/*=======================*/
// 6. Clean merged data  //
/*=======================*/

use "$NAPP/MolaModified_NAPP_JAG2", clear

gen nameproper=proper(name)
drop border extremeborder
gen border=(nameproper=="Battersea" | nameproper=="Bow" | nameproper=="Bromley St Leonard" | nameproper=="Brompton" | nameproper=="Camberwell" |  nameproper=="St Dunstan Stepney/Mile End" | nameproper=="Mile End New Town"  ///
| nameproper=="Poplar"  | nameproper=="St George Hanover Square" | nameproper=="St James Clerkenwell" | nameproper=="St Leonard Shoreditch" | nameproper=="St Luke Chelsea" | nameproper=="St Luke Old Street" ///
| nameproper=="St Margaret Westminster" | nameproper=="St Mary Abbots Kensington" | nameproper=="St Mary Lambeth" | nameproper=="St Mary Paddington" | nameproper=="St Mary Rotherhithe" | nameproper=="St Marylebone" ///
| nameproper=="St Matthew Bethnal Green" | nameproper=="St Nicholas Deptford" | nameproper=="St Pancras" | nameproper=="St Paul Deptford" | nameproper=="Greenwich" ///
| nameproper=="St Anne Kensington" | nameproper=="Brompton" | nameproper=="Islington")

gen extremeborder=border
replace extremeborder=0 if nameproper=="St George Hanover Square" | nameproper=="St Luke Old Street" |  nameproper=="St Dunstan Stepney/Mile End" | nameproper=="Mile End New Town" | nameproper=="St Marylebone"

// identify matched observation 
replace _merge=3 if mergeclink==3 & _merge==1
drop if _merge==2 
gen mergeoriginal=_merge

gen ____TECHNICAL____ = . 
order sample serial HHNUM HGB pernum perwt PERNUM RECIDGB, after(____TECHNICAL____)
*To split households based on heads, we have 628511 heads and HHNUM unique=610781. 
*Among them, 1916 are from HH servants (we would like to get rid of them | RELAGB==6010 | RELAGB==6011 | RELAGB==6012)
gen hhhead=(RELAGB==10 | RELAGB==11 | RELAGB==12 | RELAGB==2010 | RELAGB==3010 | RELAGB==4010 | RELAGB==5010 )
sort HHNUM PERNUM
gen HHNUMhead=1 if _n==1
replace HHNUMhead=hhhead[_n]+HHNUMhead[_n-1] if _n>1
sort HHNUMhead PERNUM
by HHNUMhead: gen PERNUMhead=_n
*We would like to eliminate the below observations, they live were they work as well or we don't know their relation to head household
gen HHservantunknown=(RELAGB>=6000) //see line 1225
gen ____GEOGRAPHY____ = .
order CNTRYGB RGCNTYGB RGDISTGB SUBDIDGB PARIDGB name parish name2 ADDRESS new_address new_p_name social civil longitude latitude objectid_1 border extremeborder, after(____GEOGRAPHY____)
order PARTYPGB SUBTYPGB PARPOP, after(civil)
/*
MOLA: parish(original) name2 (final)
NAPP: name(string) == PARIDGB(numerical)
mola: new_p_name
NAPP: new_address
*/
label variable parish "mola(original)"
label variable name2 "mola(final)"
label variable new_p_name "mola(address)"
label variable new_address "napp(address)"
label variable objectid_1 "Map points id"

drop new_address0 temp new_p_name0 
drop id_master clinkvar id_using mergeclink x_1 x1 x2 x3 y total2 match2 only_using2 only_master2 miss_master2

gen ____HOUSEHOLD____ = .
order HEDINFGB HEADLOC HEADAGE HEDMARST HEADOCC THCFU HAMLASLT CFU TFCFU CFUSIZE FATHERGB MOTHERGB UNMARDAU UNMARSON MARRYDAU MARRYSON RELATS INMATEGB SERVANGB UNMARKID NONRELS VISITGB, after(____HOUSEHOLD____)

gen ____INDIVIDUAL____ = .
order NAMELAST NAMEFRST SEX SEXINFGB AGE AGEINFGB MARST MARINFGB RELAGB RELINFGB OCCSTRNG OCC81GB INACTVGB RETIRDGB ORDERGB OCCINFGB INSTITGB BPSTPAGB BPCCTYGB DISAB SPORIGNL, after(____INDIVIDUAL____)

drop PIECEGB FOLIOGB PAGEGB p_name id namepar nameadd z k random f f2 nameproper

gen headchild=(RELAGB==30 | RELAGB==31   | RELAGB==32 | RELAGB==33 | RELAGB==34 | RELAGB==37 | RELAGB==38 | RELAGB==2030 ///
| RELAGB==2031 | RELAGB==2032 | RELAGB==3030 | RELAGB==3031 | RELAGB==3032 | RELAGB==3033 ///
| RELAGB==3034 | RELAGB==3037 | RELAGB==3038 | RELAGB==4030 | RELAGB==4031 | RELAGB==4032 | RELAGB==4033 | RELAGB==4034 | RELAGB==4037 | RELAGB==4038 | RELAGB==5030 | RELAGB==5031 ///
| RELAGB==5032 | RELAGB==6030 | RELAGB==6031 | RELAGB==6032 | RELAGB==6033 | RELAGB==6034) //RELAGB==2035 | RELAGB==2036 |
replace headchild=0 if INSTITGB!=1 & INSTITGB!=5
bys HHNUMhead: egen tot_child=sum(headchild)

gen inactive=(INACTVGB<7000 | INACTVGB>=8000)
replace inactive=. if INACTVGB==9999 | INACTVGB==. 
replace inactive=. if AGE<15 
label var inactive "Created: Inactive"

gen retired=(inactive==1 & INACTVGB>=8000)
replace retired=. if inactive==.
replace retired=. if AGE<15 
replace retired=1 if retired==0 & RETIRDGB==1
label var retired "Created: Retired"

gen occupied=(inactive==0 & retired==0)
replace occupied=. if AGE<15 
replace occupied=. if  inactive==. | inactive==1
replace occupied=. if  retired==. | retired==1
label var occupied "Created: occupied"

gen unemployed=1 if inactive==1 & retired==0 & (INACTVGB==3700 & AGE>=15  ) & OCC81GB~=414
replace unemployed=0 if occupied==1
label var unemployed "Created: unemployed"

replace occupied=0 if unemployed==1

*those that say unemployed or any variation
gen unemployednew=(strpos(OCCSTRNG,"UNEMP")>0)
replace unemployednew=1 if (strpos(OCCSTRNG,"UNEM")>0)
replace unemployednew=1 if (strpos(OCCSTRNG,"UNEP")>0)
replace unemployednew=1 if (strpos(OCCSTRNG,"UNENP")>0)
replace unemployednew=1 if (strpos(OCCSTRNG,"EMPLOYMENT")>0)
replace unemployednew=1 if (strpos(OCCSTRNG,"OUT OF E")>0)
replace unemployednew=1 if (strpos(OCCSTRNG,"OUT OF B")>0)
replace unemployednew=1 if (strpos(OCCSTRNG,"OUT OF W")>0)
replace unemployednew=1 if (strpos(OCCSTRNG,"OUT OF O")>0)
replace unemployednew=1 if (strpos(OCCSTRNG,"OUT OF PR")>0)
replace unemployednew=1 if (strpos(OCCSTRNG,"OUT OF SE")>0)

*Population economicly active
gen peanew=(SEX==1 & AGE>=15 & ( (INACTVGB>=7000 & INACTVGB<=7413) | (INACTVGB==9999))) //(INACTVGB>=3100 & INACTVGB<=3400) Eliminate from PEA those living from land, annuitant, pensioner, baron, noble
replace peanew=. if ~(SEX==1 & AGE>=15)
replace peanew=. if peanew~=. & RETIRDGB==1
replace peanew=1 if peanew==0 & unemployednew==1 & ((strpos(OCCSTRNG,"(ILL")==0) & (strpos(OCCSTRNG,"INV")==0) & INACTVGB~=3770 ) & (INACTVGB<3100 | INACTVGB>3400)
replace peanew=. if peanew~=. & INACTVGB==9999 & OCC81GB==999

*Population employed or unemployed from those pea
gen employednew=(peanew==1 & unemployednew==0)
replace employednew=. if peanew~=1

order idmaster-migrantd, last
save "$NAPP/MolaModified_NAPP_JAG3new",replace

drop idmaster-linkcoun

* nativity
gen nativity = 1 
replace nativity = 2 if BPCCTYGB==095
replace nativity = 3 if BPCCTYGB==998 
replace nativity = 8 if BPCCTYGB==997
replace nativity = 9 if BPCCTYGB==999
label variable nativity "created: Nativity"
label def nat 1 "UK" 2 "Ireland" 3 "Foreign" 8 "At sea" 9 "Unknown"
label val nativity nat

cap rename migrantd migrant
order migrant, after(nativity)

* migrant
gen migrant = . 
replace migrant = 1 if name==BPSTPAGB & nativity==1
replace migrant = 2 if name!=BPSTPAGB & nativity==1
replace migrant = 3 if nativity==2 | nativity==3 | nativity==8
replace migrant = 9 if nativity==9
label variable migrant "created: Migrant"

label def mi 1 "Eng - in parish/sub/district of birth"  2 "Eng - in diff but same county of birth" 3 "Eng - in diff parish/sub/district/county" 4 "Sco/Wal" 5 "Foreign-born" 6 "Unclassifiable due to unknown birthplace"
label val migrant mi 

* labforce
gen labforce = .
replace labforce = 1 if INACTVGB>=7000 & INACTVGB<8000 
replace labforce = 2 if INACTVGB<7000 | INACTVGB>=8000
replace labforce = 2 if AGE<15 
replace labforce = 9 if INACTVGB==9999| INACTVGB==. 
label variable labforce "created: labor force"

label def lab 1 "Yes, in labor force" 2 "No, not in labor force" 9 "Unknown"
label val labforce lab

* Categories of occupation
gen category = 0					
replace category = 1 if OCC81GB>=1 & OCC81GB<=16 	
replace category = 2 if OCC81GB>16 & OCC81GB<=23 	
replace category = 3 if OCC81GB>23 & OCC81GB<=26 	
replace category = 4 if OCC81GB>26 & OCC81GB<=31 	
replace category = 5 if OCC81GB>31 & OCC81GB<=34 	
replace category = 6 if (OCC81GB>34 & OCC81GB<=36) | (OCC81GB>=43 & OCC81GB<=52)	
replace category = 7 if OCC81GB>36 & OCC81GB<=42 	
replace category = 8 if OCC81GB==53 			
replace category = 9 if OCC81GB>53 & OCC81GB<=63  	
replace category = 10 if (OCC81GB>63 & OCC81GB<=65) | (OCC81GB>=410 & OCC81GB<=412)	
replace category = 11 if (OCC81GB>65 & OCC81GB<=77) | (OCC81GB>=399 & OCC81GB<=402) | OCC81GB==413 	
replace category = 12 if OCC81GB>77 & OCC81GB<=99	
replace category = 13 if OCC81GB>99 & OCC81GB<=112 | OCC81GB==404	
replace category = 14 if OCC81GB>112 & OCC81GB<=122	
replace category = 15 if OCC81GB>122 & OCC81GB<=132	
replace category = 16 if (OCC81GB>132 & OCC81GB<=166) | (OCC81GB>186 & OCC81GB<=197) | OCC81GB==405 | OCC81GB==408 | OCC81GB==409 | OCC81GB==407
replace category = 17 if (OCC81GB>166 & OCC81GB<=186) | (OCC81GB>197 & OCC81GB<=210) | OCC81GB==403	
replace category = 18 if OCC81GB>210 & OCC81GB<=238 	
replace category = 19 if (OCC81GB>238 & OCC81GB<=398) | OCC81GB==406 	
replace category = 20 if OCC81GB==414			

label variable category "created: Occupation grouping"
label def cat 1 "Civil service" 2 "Clerical" 3 "Legal professions" 4 "Medical professions" 5 "Education" 6 "Liberal arts" 7 "Scientists" 8 "Sports" 9 "Domestics" 10 "Services" 11 "Sales" 12 "Transport and communication" 13 "Agriculture" 14 "Animals" 15 "Media" 16 "Technical" 17 "Builder" 18 "Food dealer" 19 "Artisan" 20 "Rent, rank (no specific occupation)" 0 "Missing or unknown meaning"
label values category cat


* Class of occupations (larger than category)
gen CLASSGB=(ORDERGB>=1 & ORDERGB<4)
replace CLASSGB=2 if ORDERGB==4
replace CLASSGB=3 if ORDERGB>4 & ORDERGB<7
replace CLASSGB=4 if ORDERGB>6 & ORDERGB<9
replace CLASSGB=5 if ORDERGB>8 & ORDERGB<24
replace CLASSGB=6 if ORDERGB==24
label var CLASSGB "created: Class of ocupation"
label val CLASSGB class
label def class 6 "Unoccupied" 5 "Industrial" 4 "Agricultural" 3 "Commercial" 2 "Domestic" 1 "Professional"
**Eliminate Agriculutral Occupations
drop if CLASSGB==4

*Eliminate obs that are employed but occ is blank or NIU or of unkown meaning
drop if employednew==1 & ( OCC81GB==999 | OCC81GB==998)
*Move to unemployed those with no specific occupation that are not inactive
replace employednew=0 if employednew~=. & category==20

gen classnew=CLASSGB
replace classnew=. if employednew==.
replace classnew=0 if employednew==0
replace classnew=6 if classnew==5 & category==19
replace classnew=7 if classnew==5 & category==17
replace classnew=8 if classnew==5 & (category==18 | category==13)
replace classnew=9 if classnew==5 
recode classnew (6 = 4 ) (7 = 5) (8=6) (9=7)
label val classnew classnew
label def classnew 0 "Unemployed" 1 "Professional" 2 "Domestic" 3 "Commercial" 4 "Ind-Artisan" 5 "Ind-Builder" 6 "Ind-Food" 7 "Ind-Services"

levelsof CLASSGB, local(Class)
foreach o of local Class {
di `o'
local labelclass`o': label (CLASSGB) `o' 5
gen class`o'=(CLASSGB==`o')
*replace class`o'=. if occupied==.
replace class`o'=. if labforce~=1
label variable class`o' "`labelclass`o''"
label val class`o' class`o'
label def class`o' 0 "other" 1 "`label`o''"
}
rename CLASSGB classgb

levelsof ORDERGB, local(Occupation)
foreach o of local Occupation {
di `o'
local label`o': label (ORDERGB) `o' 5
gen occupation`o'=(ORDERGB==`o')
replace occupation`o'=. if labforce~=1
label variable occupation`o' "`label`o''"
label val occupation`o' occupation`o'
label def occupation`o' 0 "other" 1 "`label`o''"
}
replace occupation24=1 if labforce==2

levelsof classnew, local(Class)
foreach o of local Class {
di `o'
local labelclassnew`o': label (classnew) `o' 6
gen classnew`o'=(classnew==`o')
replace classnew`o'=. if peanew~=1
label variable classnew`o' "`labelclassnew`o''"
label val classnew`o' classnew`o'
label def classnew`o' 0 "other" 1 "`labelclassnew`o''"
}

levelsof ORDERGB, local(Occupation)
foreach o of local Occupation {
di `o'
local label`o': label (ORDERGB) `o' 5
gen occupationnew`o'=(ORDERGB==`o')
*replace occupation`o'=. if occupied~=1
replace occupationnew`o'=. if employednew~=1
label variable occupationnew`o' "`label`o''"
label val occupationnew`o' occupation`o'
}
replace occupationnew24=1 if employednew==0

* Keep only merged data
keep if _merge==3

* Number of social parishes by civil parish
bys civil social: gen s = _n==1
bys civil: egen social_c = sum(s)
su social_c

* Total number of social, civil parishes and poor law districts
bys social: gen ss = _n==1
egen tot_social = sum(ss)
bys civil: gen cc = _n==1
egen tot_civil = sum(cc)

* Spouse variables
gen positionspouse=SPORIGNL if SPORIGNL~=0 & SPORIGNL~=9999
gen OCC81spouse=.
gen labforcespouse=.
levelsof positionspouse, local(positions)
foreach x of local positions {
	gen tempOCC`x'=OCC81GB if PERNUM==`x'
	bys HHNUMhead: egen OCC81spouse`x'=min(tempOCC`x')
	replace OCC81spouse=OCC81spouse`x' if positionspouse==`x'
	gen templabforce`x'=labforce if PERNUM==`x'
	bys HHNUMhead: egen labforcespouse`x'=min(templabforce`x')
	replace labforcespouse=labforcespouse`x' if positionspouse==`x'
	drop temp* OCC81spouse`x' labforcespouse`x'
}

save "$NAPP/MolaModified_NAPP_JAG4new",replace

merge m:1 objectid_1 using  "$MOLA/ll_point1881_soc_civ_nearplacebo", gen(mergeplacebo)
keep if mergeplacebo==3
save "$NAPP/MolaModified_NAPP_JAG4placebonew",replace


/*======================================*/
// 7. Create data to export into ArcGIS //
/*======================================*/

set more off
use "$NAPP/MolaModified_NAPP_JAG4new", clear

levelsof ORDERGB, local(Occupation)
foreach o of local Occupation {
	di `o'
	local label`o': label (ORDERGB) `o' 5
}

levelsof classgb, local(Class)
foreach o of local Class {
	di `o'
	local labelclass`o': label (classgb) `o' 5
}

foreach o of local Class {
	di `o'
	local labelclassnew`o': label (classnew) `o' 6
}

local i=1
forval x=0(5)70 {
	local y=`x'+5
	gen dage`i'=(AGE>=`x' & AGE<`y')
	replace dage`i'=. if (AGE==999 | AGE==.)
	label var dage`i' "AGE>=`x' & AGE<`y'"
	local i=`i'+1
	di `i'
}

gen dage`i'=(AGE>=75)
replace dage`i'=. if (AGE==999 | AGE==.)
label var dage`i' "AGE>=75"
gen servants=SERVANGB if SERVANGB!=99
gen female=(SEX==2)
replace female=. if SEX==9
gen married=(MARST<4)
replace married=. if MARST==9
gen ukborn=(migrant<4)
*no migrant parish
gen nomigrantp=(migrant<=1)
*no migrant county
gen nomigrantc=(migrant<=2)
replace ukborn=. if migrant==6
replace nomigrantp=. if migrant==6
replace nomigrantc=. if migrant==6
gen irborn=(nativity==2)
replace irborn=. if nativity==9
gen foreborn=(nativity==2 | nativity==3 | nativity==8)
replace foreborn=. if nativity==9

foreach var of varlist UNMARKID UNMARSON UNMARDAU MARRYDAU MARRYSON {
	mvdecode `var', mv(99)
	rename `var', lower
}

mvdecode labforce, mv(9)
recode labforce (2=0)

*Defining our sample universe
keep if AGE>=15 & AGE<=60

*drop if labforce==.
drop if employednew==.
gen residential=(INSTITGB==1 | INSTITGB==5)
recode SEX (1=1) (2=0) (9=.)

*To recover some institutional locations
preserve

keep if residential==0
drop if category==0
keep if SEX==1

replace category=5 if OCCSTRNG=="BA STUDENT" & category==20
replace category=1 if OCCSTRNG=="BARONET" & category==20
replace category=1 if OCCSTRNG=="BARONETS SON" & category==20
replace category=1 if OCCSTRNG=="CAPTAIN OF FRENCH CALVARY" & category==20
replace category=1 if OCCSTRNG=="CAPTAIN PRUSSIAN ARMY" & category==20
replace category=1 if OCCSTRNG=="CONSUL (24)" & category==20
replace category=1 if OCCSTRNG=="CONSUL (NO OCC)" & category==20
replace category=1 if OCCSTRNG=="CONSUL FOR DENMARK (FOREIGN)(NO OCCU)" & category==20
replace category=1 if OCCSTRNG=="COURT" & category==20
replace category=5 if OCCSTRNG=="GERMAN STUDENT" & category==20
replace category=5 if OCCSTRNG=="SCHOLAR" & category==20
replace category=5 if OCCSTRNG=="SCHOLAR AT COLLEGE" & category==20
replace category=5 if OCCSTRNG=="SCHOLAR WESTMINSTER SCHOOL" & category==20
replace category=5 if OCCSTRNG=="STUDENT" & category==20
replace category=5 if OCCSTRNG=="STUDENT (ASSOCIAT PHRMACULICAL SOCIETY)" & category==20
replace category=5 if OCCSTRNG=="STUDENT (MED)" & category==20
replace category=5 if OCCSTRNG=="STUDENT IN ARTS" & category==20
replace category=5 if OCCSTRNG=="UNDERGRADUATE" & category==20
replace category=5 if OCCSTRNG=="UNDERGRADUATE BALLIOL COL" & category==20
replace category=5 if OCCSTRNG=="UNDERGRADUATE CAMBRIDGE" & category==20
replace category=5 if OCCSTRNG=="UNDERGRADUATE OXFORD STUDENT" & category==20
drop if category==20	//because we don't know how do we get this info
bys serial: egen categorymod=mode(category)
bys serial: egen minpernum=min(pernum)	
gen categoryone=category if pernum==minpernum		//assuming that the first to answer the survey, that also is in the labour market, is the one defining the category of the institution
bys serial: egen categoryonemod=min(categoryone)
replace categorymod=categoryonemod if categorymod==.
label val categorymod cat
drop if categorymod==.
bys serial : egen total=count(labforce)
duplicates drop objectid_1 serial, force
keep objectid_1 serial categorymod total
label var objectid_1 objectid_1
label var serial serial
gen categoryreduced=0
replace categoryreduced=8 if categorymod==1
replace categoryreduced=9 if categorymod==2
replace categoryreduced=9 if categorymod==3
replace categoryreduced=9 if categorymod==4
replace categoryreduced=6 if categorymod==5
replace categoryreduced=5 if categorymod==6
replace categoryreduced=6 if categorymod==7
replace categoryreduced=5 if categorymod==8
replace categoryreduced=0 if categorymod==9
replace categoryreduced=4 if categorymod==10
replace categoryreduced=7 if categorymod==11
replace categoryreduced=4 if categorymod==12
replace categoryreduced=3 if categorymod==13
replace categoryreduced=3 if categorymod==14
replace categoryreduced=5 if categorymod==15
replace categoryreduced=2 if categorymod==16
replace categoryreduced=1 if categorymod==17
replace categoryreduced=2 if categorymod==18
replace categoryreduced=1 if categorymod==19

label val categoryreduced catr
label def catr 0 Domestic, add
label def catr 1 "Builder and Artisan", add
label def catr 2 "Food dealer", add
label def catr 3 Agriculture, add
label def catr 4 Services, add
label def catr 5 "Arts, Media and sports", add
label def catr 6 Education, add
label def catr 7 Sales, add
label def catr 8 Government, add
label def catr 9 Professional, add

rename categorymod category

export excel "$NAPP/MolaModified_NAPP_Inst_JAG1new", replace firstrow(varlabels) sheet("Inst_JAG1") //firstrow(variables)

restore

*To keep only the residential observations we will use for the analysis
drop if residential==0
drop if  HHservantunknown==1
set more off
preserve

drop if AGE==999 | AGE==.
drop if SEX==.
drop if MARST==9
drop if nativity==9	
	
keep if RELAGB==10 | RELAGB==11 | RELAGB==12 | RELAGB==2010 | RELAGB==3010 | RELAGB==4010 | RELAGB==5010
gen OBJECTID_1=objectid_1
merge  m:1 OBJECTID_1 using "$MOLA/ll_point1881_soc_civ_near", gen(mergemap)
keep if mergemap==3
order longitude latitude OBJECTID_1
gen foreign=. 
replace foreign= 1 if nativity==3 | nativity==2 | nativity==8
replace foreign=0 if nativity==1
drop if foreign==1 | nativity==9		//because we don't know where they were born
save  "$NAPP/MolaModified_NAPP_Heads_JAG1_mapnew.dta", replace

keep inactive-dage16 objectid_1 female tot_child servants married ukborn irborn foreborn migrant nomigrant* unmarson unmardau marrydau marryson AGE civil
*export excel "$NAPP/MolaModified_NAPP_JAGAll", replace firstrow(variables)
order objectid_1 inactive-dage16 female tot_child servants married ukborn irborn foreborn migrant nomigrant* unmarson unmardau marrydau marryson
outsheet occupation1-dage16 servants objectid_1 using "$NAPP/MolaModified_NAPP_Heads_JAG1new.txt", nolabel replace

foreach var of varlist inactive-dage16 female tot_child servants married ukborn irborn foreborn nomigrant* unmarson unmardau marrydau marryson {
		gen per`var'=`var'
}

collapse (sum) inactive-dage16 female tot_child servants married ukborn irborn foreborn nomigrant* unmarson unmardau marrydau marryson (mean) perinactive-perdage16 perfemale-permarryson civil, by(objectid_1)

foreach o of local Occupation {
		label variable occupation`o' "`label`o''"
		label variable peroccupation`o' "p`label`o''"
}
	
foreach o of local Class {
		label variable class`o' "`labelclass`o''"
		label variable perclass`o' "p`labelclass`o''"
}
	
foreach o of local Occupation {
		label variable occupationnew`o' "`labelnew`o''"
		label variable peroccupationnew`o' "p`labelnew`o''"
}
	
foreach o of local Class {
		label variable classnew`o' "`labelclassnew`o''"
		label variable perclassnew`o' "p`labelclassnew`o''"
}

local i=1
forval x=0(5)70 {
		local y=`x'+5
		label var dage`i' "AGE`x'to`y'"
		label var perdage`i' "pAGE`x'to`y'"
		local i=`i'+1
		di `i'
}
	
label var dage`i' "AGE75"
label var perdage`i' "pAGE75"
label var occupied Occupied
label var unemployed Unemployed
label var employednew Employednew
label var tot_child Children
label var servants Servants
label var inactive Inactive
label var retired Retired
label var peroccupied pOccupied
label var perunemployed pUnemployed
label var peremployednew pEmployednew
label var pertot_child pChildren
label var perservants pServants
label var perinactive pInactive
label var perretired pRetired
	
foreach var of varlist labforce class1 class2 class3 class5 female tot_child servants married nomigrant* peanew employednew ///
		classnew0 classnew1 classnew2 classnew3 classnew4 classnew5 classnew6 classnew7 {
	qui areg per`var', absorb(civil)
	predict res`var', resid
	label var res`var' r`var'
}
label var restot_child rChildren
label var resservants rServants
	
foreach var of varlist s social_c ss tot_social cc tot_civil nativity nomigrant* category classgb labforce classnew employednew female married ///
	ukborn irborn foreborn nomigrant* unmarson unmardau marrydau marryson positionspouse OCC81spouse labforcespouse {
	label var `var' `var' 
	label var per`var' p`var'
	}
cap drop CLASSGB
cap drop perCLASSGB
label var objectid_1 objectid_1
	
export excel "$NAPP/MolaModified_NAPP_Heads_JAG1new", replace firstrow(varlabels) sheet("HEADS_JAG1") //firstrow(variables)

restore

keep inactive-dage16 objectid_1 female tot_child servants married ukborn irborn foreborn nomigrant* unmarson unmardau marrydau marryson AGE civil
order objectid_1 inactive-dage16 female tot_child servants married ukborn irborn foreborn nomigrant* unmarson unmardau marrydau marryson
outsheet occupation1-dage16 servants objectid_1 using "$NAPP/MolaModified_NAPP_ALL_JAG1new.txt", nolabel replace


foreach var of varlist inactive-dage16 female tot_child servants married ukborn irborn foreborn nomigrant* unmarson unmardau marrydau marryson {
gen per`var'=`var'
}

collapse (sum) inactive-dage16 female tot_child servants married ukborn irborn foreborn nomigrant* unmarson unmardau marrydau marryson ///
(mean) perinactive-perdage16 perfemale-permarryson civil, by(objectid_1)

foreach o of local Occupation {
label variable occupation`o' "`label`o''"
label variable peroccupation`o' "p`label`o''"
}
foreach o of local Class {
label variable class`o' "`labelclass`o''"
label variable perclass`o' "p`labelclass`o''"
}

foreach o of local Occupation {
label variable occupationnew`o' "`labelnew`o''"
label variable peroccupationnew`o' "p`labelnew`o''"
}
foreach o of local Class {
label variable classnew`o' "`labelclassnew`o''"
label variable perclassnew`o' "p`labelclassnew`o''"
}

local i=1
forval x=0(5)70 {
local y=`x'+5
label var dage`i' "AGE`x'to`y'"
label var perdage`i' "pAGE`x'to`y'"
local i=`i'+1
di `i'
}
label var dage`i' "AGE75"
label var perdage`i' "pAGE75"
label var occupied Occupied
label var unemployed Unemployed
label var employednew Employednew
label var tot_child Children
label var servants Servants
label var inactive Inactive
label var retired Retired
label var peroccupied pOccupied
label var perunemployed pUnemployed
label var peremployednew pEmployednew
label var pertot_child pChildren
label var perservants pServants
label var perinactive pInactive
label var perretired pRetired
foreach var of varlist s social_c ss tot_social cc tot_civil nativity nomigrant* category classgb labforce classnew employednew female married ///
ukborn irborn foreborn nomigrant* unmarson unmardau marrydau marryson positionspouse OCC81spouse labforcespouse {
label var `var' `var' 
label var per`var' p`var'
}

foreach var of varlist labforce class1 class2 class3 class5 female tot_child servants married nomigrant* peanew employednew ///
		classnew0 classnew1 classnew2 classnew3 classnew4 classnew5 classnew6 classnew7 {
	qui areg per`var', absorb(civil)
	predict res`var', resid
	label var res`var' r`var'
}
label var restot_child rChildren
label var resservants rServants

cap drop CLASSGB
cap drop perCLASSGB
label var objectid_1 objectid_1
export excel "$NAPP/MolaModified_NAPP_ALL_JAG1new", replace firstrow(varlabels) sheet("ALL_JAG1") //firstrow(variables)


